##----- Load AREPA
source("arepa.R")

##----- Variables to set

years <- 1990:2001 # possible range 1990-2013 
within_km <- 9.656# 6 miles #####   4.828 #3 miles #####   
observation_percent <- 67 # as per EPA suggestion
parameter_code <- 81102 # for PM 10

##----- Get PM10 annual data and set unique Monitor key

# Downoald AQS data (will use most updated version on EPA website)
# get_AQS_data_annual(years)

list_PM <- lapply(years, load_annual_average)
list_PM_10 <- lapply(list_PM,
                     function(dataset) subset(dataset, Parameter.Code == parameter_code))
PM <- rbindlist(list_PM_10)

# Create monitor ID
PM[, Monitor := paste(sprintf("%02d", as.numeric(State.Code)),
                      sprintf("%03d", as.numeric(County.Code)),
                      "-",
                      sprintf("%04d", as.numeric(Site.Num)), sep = "")]

##----- Unique monitors: 3047
# unique(PM, by = "Monitor")

##----- Remove MICROSCALE monitors and valid days >= Valid.Day.Count

PM <- subset_monitors(MONITORS = PM,
                      parameter_code = parameter_code,
                      observation_percent = observation_percent)

##----- Zip codes

ZIP <- get_zip_codes()
  
##----- Spatial index to join monitors and Zip codes.  May take a couple of minutes

Link_PM_Zip_Index <- spatial_link_index(PM, "Latitude", "Longitude", "Monitor",
                                        ZIP, "Latitude.zip", "Longitude.zip", "zip",
                                        within = within_km)

##----- Assign every zip code to its closest monitor

Link_PM_Zip_Index_Unique <- copy(Link_PM_Zip_Index)
Link_PM_Zip_Index_Unique[, Include.in.Average := 0]
setkeyv(Link_PM_Zip_Index_Unique, c("Distance"))
setkeyv(Link_PM_Zip_Index_Unique, c("zip"))
Link_PM_Zip_Index_Unique[Link_PM_Zip_Index_Unique[, .I[1], by = key(Link_PM_Zip_Index_Unique)]$V1, 
                         Include.in.Average := 1]

Link_PM_Zip_Index_Unique <- subset(Link_PM_Zip_Index_Unique, Include.in.Average == 1)
Link_PM_Zip_Index_Unique[, Include.in.Average := NULL]

##----- Aggregate (real or simulated) Medicare data by monitor
##----- simulated data
MEDICARE <- fread("Medicare_annual_PM10_zipcode_simulated_6mile.csv")[, V1 := NULL]

MEDICARE$Year = MEDICARE$year

MEDICARE <- MEDICARE[!is.na(Year)]
MEDICARE <- MEDICARE[!is.na("Monitor")]

MEDICARE <- MEDICARE[, list(mean_age = mean(mean_age, na.rm = TRUE),
                            Female_rate = mean(Female_rate, na.rm = TRUE),
                            White_rate = mean(White_rate, na.rm = TRUE),
                            Black_rate = mean(Black_rate, na.rm = TRUE),
                            Total_death = sum(Total_death, na.rm = TRUE),
                            Tot_Beneficiary = sum(Tot_Beneficiary, na.rm = TRUE),
                            pyear = sum(pyear, na.rm = TRUE),
                            ALL_CVD = sum(ALL_CVD, na.rm = TRUE),
                            AMI = sum(AMI, na.rm = TRUE),
                            COPD = sum(COPD, na.rm = TRUE),
                            CV_stroke = sum(CV_stroke, na.rm = TRUE),
                            HF = sum(HF, na.rm = TRUE),
                            HRD = sum(HRD, na.rm = TRUE),
                            IHD = sum(IHD, na.rm = TRUE),
                            PVD = sum(PVD, na.rm = TRUE),
                            RTI = sum(RTI, na.rm = TRUE),
                            HRP = sum(HRP, na.rm = TRUE),
                            Respiratory = sum(Respiratory, na.rm = TRUE),
                            Nb_zip = length(Zipcode_listed_R)),
                     by = c("Year", "Monitor")]

##----- Merge back

PM_MEDICARE_LINKED <- merge(PM, MEDICARE, by = c("Monitor", "Year"), all = TRUE, allow.cartesian = TRUE)
names(PM_MEDICARE_LINKED)

##----- If using fake data
write.csv(PM_MEDICARE_LINKED, "Merge_6mile.csv")


